<!DOCTYPE html>
<html lang="en-us">
  <head>

    <meta http-equiv="content-type" content="text/html; charset=utf-8">
    
<meta charset="UTF-8">
<title>Fielddata Filtering | Elasticsearch: The Definitive Guide [2.x] | Elastic</title>
<link rel="home" href="index.html" title="Elasticsearch: The Definitive Guide [2.x]">
<link rel="up" href="docvalues-and-fielddata.html" title="Doc Values and Fielddata">
<link rel="prev" href="_limiting_memory_usage.html" title="Limiting Memory Usage">
<link rel="next" href="preload-fielddata.html" title="Preloading Fielddata">
<meta name="DC.type" content="Learn/Docs/Legacy/Elasticsearch/Definitive Guide/2.x">
<meta name="DC.subject" content="Elasticsearch">
<meta name="DC.identifier" content="2.x">
<meta name="robots" content="noindex,nofollow">
    <meta http-equiv="X-UA-Compatible" content="IE=edge">
    <meta name="viewport" content="width=device-width, initial-scale=1">
    <script src="https://cdn.optimizely.com/js/18132920325.js"></script>
    <link rel="apple-touch-icon" sizes="57x57" href="/apple-icon-57x57.png">
    <link rel="apple-touch-icon" sizes="60x60" href="/apple-icon-60x60.png">
    <link rel="apple-touch-icon" sizes="72x72" href="/apple-icon-72x72.png">
    <link rel="apple-touch-icon" sizes="76x76" href="/apple-icon-76x76.png">
    <link rel="apple-touch-icon" sizes="114x114" href="/apple-icon-114x114.png">
    <link rel="apple-touch-icon" sizes="120x120" href="/apple-icon-120x120.png">
    <link rel="apple-touch-icon" sizes="144x144" href="/apple-icon-144x144.png">
    <link rel="apple-touch-icon" sizes="152x152" href="/apple-icon-152x152.png">
    <link rel="apple-touch-icon" sizes="180x180" href="/apple-icon-180x180.png">
    <link rel="icon" type="image/png" href="/favicon-32x32.png" sizes="32x32">
    <link rel="icon" type="image/png" href="/android-chrome-192x192.png" sizes="192x192">
    <link rel="icon" type="image/png" href="/favicon-96x96.png" sizes="96x96">
    <link rel="icon" type="image/png" href="/favicon-16x16.png" sizes="16x16">
    <link rel="manifest" href="/manifest.json">
    <meta name="apple-mobile-web-app-title" content="Elastic">
    <meta name="application-name" content="Elastic">
    <meta name="msapplication-TileColor" content="#ffffff">
    <meta name="msapplication-TileImage" content="/mstile-144x144.png">
    <meta name="theme-color" content="#ffffff">
    <meta name="naver-site-verification" content="936882c1853b701b3cef3721758d80535413dbfd">
    <meta name="yandex-verification" content="d8a47e95d0972434">
    <meta name="localized" content="true">
    <meta name="st:robots" content="follow,index">
    <meta property="og:image" content="https://www.elastic.co/static/images/elastic-logo-200.png">
    <link rel="shortcut icon" href="/favicon.ico" type="image/x-icon">
    <link rel="icon" href="/favicon.ico" type="image/x-icon">
    <link rel="apple-touch-icon-precomposed" sizes="64x64" href="/favicon_64x64_16bit.png">
    <link rel="apple-touch-icon-precomposed" sizes="32x32" href="/favicon_32x32.png">
    <link rel="apple-touch-icon-precomposed" sizes="16x16" href="/favicon_16x16.png">
    <!-- Give IE8 a fighting chance -->
    <!--[if lt IE 9]>
    <script src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js"></script>
    <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
    <![endif]-->
    <link rel="stylesheet" type="text/css" href="/guide/static/styles.css">
  </head>

  <!--© 2015-2021 Elasticsearch B.V. Copying, publishing and/or distributing without written permission is strictly prohibited.-->

  <body>
    <!-- Google Tag Manager -->
    <script>dataLayer = [];</script><noscript><iframe src="//www.googletagmanager.com/ns.html?id=GTM-58RLH5" height="0" width="0" style="display:none;visibility:hidden"></iframe></noscript>
    <script>(function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start': new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0], j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src= '//www.googletagmanager.com/gtm.js?id='+i+dl;f.parentNode.insertBefore(j,f); })(window,document,'script','dataLayer','GTM-58RLH5');</script>
    <!-- End Google Tag Manager -->

    <!-- Global site tag (gtag.js) - Google Analytics -->
    <script async src="https://www.googletagmanager.com/gtag/js?id=UA-12395217-16"></script>
    <script>
      window.dataLayer = window.dataLayer || [];
      function gtag(){dataLayer.push(arguments);}
      gtag('js', new Date());
      gtag('config', 'UA-12395217-16');
    </script>

    <!--BEGIN QUALTRICS WEBSITE FEEDBACK SNIPPET-->
    <script type="text/javascript">
      (function(){var g=function(e,h,f,g){
      this.get=function(a){for(var a=a+"=",c=document.cookie.split(";"),b=0,e=c.length;b<e;b++){for(var d=c[b];" "==d.charAt(0);)d=d.substring(1,d.length);if(0==d.indexOf(a))return d.substring(a.length,d.length)}return null};
      this.set=function(a,c){var b="",b=new Date;b.setTime(b.getTime()+6048E5);b="; expires="+b.toGMTString();document.cookie=a+"="+c+b+"; path=/; "};
      this.check=function(){var a=this.get(f);if(a)a=a.split(":");else if(100!=e)"v"==h&&(e=Math.random()>=e/100?0:100),a=[h,e,0],this.set(f,a.join(":"));else return!0;var c=a[1];if(100==c)return!0;switch(a[0]){case "v":return!1;case "r":return c=a[2]%Math.floor(100/c),a[2]++,this.set(f,a.join(":")),!c}return!0};
      this.go=function(){if(this.check()){var a=document.createElement("script");a.type="text/javascript";a.src=g;document.body&&document.body.appendChild(a)}};
      this.start=function(){var a=this;window.addEventListener?window.addEventListener("load",function(){a.go()},!1):window.attachEvent&&window.attachEvent("onload",function(){a.go()})}};
      try{(new g(100,"r","QSI_S_ZN_emkP0oSe9Qrn7kF","https://znemkp0ose9qrn7kf-elastic.siteintercept.qualtrics.com/WRSiteInterceptEngine/?Q_ZID=ZN_emkP0oSe9Qrn7kF")).start()}catch(i){}})();
    </script><div id="ZN_emkP0oSe9Qrn7kF"><!--DO NOT REMOVE-CONTENTS PLACED HERE--></div>
    <!--END WEBSITE FEEDBACK SNIPPET-->

    <div id="elastic-nav" style="display:none;"></div>
    <script src="https://www.elastic.co/elastic-nav.js"></script>

    <!-- Subnav -->
    <div>
      <div>
        <div class="tertiary-nav d-none d-md-block">
          <div class="container">
            <div class="p-t-b-15 d-flex justify-content-between nav-container">
              <div class="breadcrum-wrapper"><span><a href="/guide/" style="font-size: 14px; font-weight: 600; color: #000;">Docs</a></span></div>
            </div>
          </div>
        </div>
      </div>
    </div>

    <div class="main-container">
      <section id="content">
        <div class="content-wrapper">

          <section id="guide" lang="en">
            <div class="container">
              <div class="row">
                <div class="col-xs-12 col-sm-8 col-md-8 guide-section">
                  <!-- start body -->
                  <div class="page_header">
<p>
  <strong>WARNING</strong>: The 2.x versions of Elasticsearch have passed their
  <a href="https://www.elastic.co/support/eol">EOL dates</a>. If you are running
  a 2.x version, we strongly advise you to upgrade.
</p>
<p>
  This documentation is no longer maintained and may be removed. For the latest
  information, see the <a href="https://www.elastic.co/guide/en/elasticsearch/reference/current/index.html">current
  Elasticsearch documentation</a>.
</p>
</div>
<div id="content">
<div class="breadcrumbs">
<span class="breadcrumb-link"><a href="index.html">Elasticsearch: The Definitive Guide [2.x]</a></span>
»
<span class="breadcrumb-link"><a href="aggregations.html">Aggregations</a></span>
»
<span class="breadcrumb-link"><a href="docvalues-and-fielddata.html">Doc Values and Fielddata</a></span>
»
<span class="breadcrumb-node">Fielddata Filtering</span>
</div>
<div class="navheader">
<span class="prev">
<a href="_limiting_memory_usage.html">« Limiting Memory Usage</a>
</span>
<span class="next">
<a href="preload-fielddata.html">Preloading Fielddata »</a>
</span>
</div>
<div class="section">
<div class="titlepage"><div><div>
<h2 class="title">
<a id="_fielddata_filtering"></a>Fielddata Filtering<a class="edit_me edit_me_private" rel="nofollow" title="Editing on GitHub is available to Elastic" href="https://github.com/elastic/elasticsearch-definitive-guide/edit/2.x/300_Aggregations/105_filtering.asciidoc">edit</a>
</h2>
</div></div></div>
<p>Imagine that you are running a website that allows users to listen to their
favorite songs.  To make it easier for them to manage their music library,
users can tag songs with whatever tags make sense to them.  You will end up
with a lot of tracks tagged with <code class="literal">rock</code>, <code class="literal">hiphop</code>, and <code class="literal">electronica</code>, but
also with some tracks tagged with <code class="literal">my_16th_birthday_favorite_anthem</code>.</p>
<p>Now imagine that you want to show users the most popular three tags for each
song.  It is highly likely that tags like <code class="literal">rock</code> will show up in the top
three, but <code class="literal">my_16th_birthday_favorite_anthem</code> is very unlikely to make the
grade.  However, in order to calculate the most popular tags, you have been
forced to load all of these one-off terms into memory.</p>
<p>Thanks to fielddata filtering, we can take control of this situation.  We
<em>know</em> that we’re interested in only the most popular terms, so we can simply
avoid loading any terms that fall into the less interesting long tail:</p>
<div class="pre_wrapper lang-js">
<pre class="programlisting prettyprint lang-js">PUT /music/_mapping/song
{
  "properties": {
    "tag": {
      "type": "string",
      "fielddata": { <a id="CO221-1"></a><i class="conum" data-value="1"></i>
        "filter": {
          "frequency": { <a id="CO221-2"></a><i class="conum" data-value="2"></i>
            "min":              0.01, <a id="CO221-3"></a><i class="conum" data-value="3"></i>
            "min_segment_size": 500  <a id="CO221-4"></a><i class="conum" data-value="4"></i>
          }
        }
      }
    }
  }
}</pre>
</div>
<div class="calloutlist">
<table border="0" summary="Callout list">
<tr>
<td align="left" valign="top" width="5%">
<p><a href="#CO221-1"><i class="conum" data-value="1"></i></a></p>
</td>
<td align="left" valign="top">
<p>The <code class="literal">fielddata</code> key allows us to configure how fielddata is handled for this field.</p>
</td>
</tr>
<tr>
<td align="left" valign="top" width="5%">
<p><a href="#CO221-2"><i class="conum" data-value="2"></i></a></p>
</td>
<td align="left" valign="top">
<p>The <code class="literal">frequency</code> filter allows us to filter fielddata loading based on term frequencies.</p>
</td>
</tr>
<tr>
<td align="left" valign="top" width="5%">
<p><a href="#CO221-3"><i class="conum" data-value="3"></i></a></p>
</td>
<td align="left" valign="top">
<p>Load only terms that occur in at least 1% of documents in this segment.</p>
</td>
</tr>
<tr>
<td align="left" valign="top" width="5%">
<p><a href="#CO221-4"><i class="conum" data-value="4"></i></a></p>
</td>
<td align="left" valign="top">
<p>Ignore any segments that have fewer than 500 documents.</p>
</td>
</tr>
</table>
</div>
<p>With this mapping in place, only terms that appear in at least 1% of the
documents <em>in that segment</em> will be loaded into memory. You can also specify a
<code class="literal">max</code> term frequency, which could be used to exclude terms that are <em>too</em>
common, such as <a class="xref" href="stopwords.html" title="Stopwords: Performance Versus Precision">stopwords</a>.</p>
<p>Term frequencies, in this case, are calculated per segment.  This is a
limitation of the implementation: fielddata is loaded per segment, and at
that point the only term frequencies that are visible are the frequencies for
that segment.  However, this limitation has interesting properties: it
allows newly popular terms to rise to the top quickly.</p>
<p>Let’s say that a new genre of song becomes popular one day.  You would like to
include the tag for this new genre in the most popular list, but if you were
relying on term frequencies calculated across the whole index, you would have
to wait for the new tag to become as popular as <code class="literal">rock</code> and <code class="literal">electronica</code>.
Because of the way frequency filtering is implemented, the newly added tag
will quickly show up as a high-frequency tag within new segments, so will
quickly float to the top.</p>
<p>The <code class="literal">min_segment_size</code> parameter tells Elasticsearch to ignore segments below
a certain size.  If a segment holds only a few documents, the term frequencies
are too coarse to have any meaning.  Small segments will soon be merged into
bigger segments, which will then be big enough to take into account.</p>
<div class="tip admon">
<div class="icon"></div>
<div class="admon_content">
<p>Filtering terms by frequency is not the only option. You can also decide to
load only those terms that match a regular expression.  For instance, you
could use a <code class="literal">regex</code> filter on tweets to load only hashtags into memory — terms the start with a <code class="literal">#</code>.  This assumes that you are using an analyzer that
preserves punctuation, like the <code class="literal">whitespace</code> analyzer.</p>
</div>
</div>
<p>Fielddata filtering can have a <em>massive</em> impact on memory usage.  The
trade-off is fairly obvious: you are essentially ignoring data.  But for many
applications, the trade-off is reasonable since the data is not being used
anyway.  The memory savings is often more important than including a large and
relatively useless long tail of terms.</p>
</div>
<div class="navfooter">
<span class="prev">
<a href="_limiting_memory_usage.html">« Limiting Memory Usage</a>
</span>
<span class="next">
<a href="preload-fielddata.html">Preloading Fielddata »</a>
</span>
</div>
</div>

                  <!-- end body -->
                </div>
                <div class="col-xs-12 col-sm-4 col-md-4" id="right_col">
                  <div id="rtpcontainer" style="display: block;">
                    <div class="mktg-promo">
                      <h3>Most Popular</h3>
                      <ul class="icons">
                        <li class="icon-elasticsearch-white"><a href="https://www.elastic.co/webinars/getting-started-elasticsearch?baymax=default&amp;elektra=docs&amp;storm=top-video">Get Started with Elasticsearch: Video</a></li>
                        <li class="icon-kibana-white"><a href="https://www.elastic.co/webinars/getting-started-kibana?baymax=default&amp;elektra=docs&amp;storm=top-video">Intro to Kibana: Video</a></li>
                        <li class="icon-logstash-white"><a href="https://www.elastic.co/webinars/introduction-elk-stack?baymax=default&amp;elektra=docs&amp;storm=top-video">ELK for Logs &amp; Metrics: Video</a></li>
                      </ul>
                    </div>
                  </div>
                </div>
              </div>
            </div>
          </section>

        </div>


<div id="elastic-footer"></div>
<script src="https://www.elastic.co/elastic-footer.js"></script>
<!-- Footer Section end-->

      </section>
    </div>

<script src="/guide/static/jquery.js"></script>
<script type="text/javascript" src="/guide/static/docs.js"></script>
<script type="text/javascript">
  window.initial_state = {}</script>
  </body>
</html>
